home *** CD-ROM | disk | FTP | other *** search
/ PC World Komputer 2010 April / PCWorld0410.iso / hity wydania / Ubuntu 9.10 PL / karmelkowy-koliberek-9.10-netbook-remix-PL.iso / casper / filesystem.squashfs / usr / lib / python2.6 / email / feedparser.pyc (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2009-11-11  |  10.9 KB  |  427 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. __all__ = [
  21.     'FeedParser']
  22. import re
  23. from email import errors
  24. from email import message
  25. NLCRE = re.compile('\r\n|\r|\n')
  26. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  27. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  28. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  29. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{1,}:|[\\t ])')
  30. EMPTYSTRING = ''
  31. NL = '\n'
  32. NeedMoreData = object()
  33.  
  34. class BufferedSubFile(object):
  35.     '''A file-ish object that can have new data loaded into it.
  36.  
  37.     You can also push and pop line-matching predicates onto a stack.  When the
  38.     current predicate matches the current line, a false EOF response
  39.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  40.     simple abstraction -- it parses until EOF closes the current message.
  41.     '''
  42.     
  43.     def __init__(self):
  44.         self._partial = ''
  45.         self._lines = []
  46.         self._eofstack = []
  47.         self._closed = False
  48.  
  49.     
  50.     def push_eof_matcher(self, pred):
  51.         self._eofstack.append(pred)
  52.  
  53.     
  54.     def pop_eof_matcher(self):
  55.         return self._eofstack.pop()
  56.  
  57.     
  58.     def close(self):
  59.         self._lines.append(self._partial)
  60.         self._partial = ''
  61.         self._closed = True
  62.  
  63.     
  64.     def readline(self):
  65.         if not self._lines:
  66.             if self._closed:
  67.                 return ''
  68.             return NeedMoreData
  69.         line = self._lines.pop()
  70.         for ateof in self._eofstack[::-1]:
  71.             if ateof(line):
  72.                 self._lines.append(line)
  73.                 return ''
  74.         
  75.         return line
  76.  
  77.     
  78.     def unreadline(self, line):
  79.         if not line is not NeedMoreData:
  80.             raise AssertionError
  81.         self._lines.append(line)
  82.  
  83.     
  84.     def push(self, data):
  85.         '''Push some new data into this object.'''
  86.         data = self._partial + data
  87.         self._partial = ''
  88.         parts = NLCRE_crack.split(data)
  89.         self._partial = parts.pop()
  90.         lines = []
  91.         for i in range(len(parts) // 2):
  92.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  93.         
  94.         self.pushlines(lines)
  95.  
  96.     
  97.     def pushlines(self, lines):
  98.         self._lines[:0] = lines[::-1]
  99.  
  100.     
  101.     def is_closed(self):
  102.         return self._closed
  103.  
  104.     
  105.     def __iter__(self):
  106.         return self
  107.  
  108.     
  109.     def next(self):
  110.         line = self.readline()
  111.         if line == '':
  112.             raise StopIteration
  113.         line == ''
  114.         return line
  115.  
  116.  
  117.  
  118. class FeedParser:
  119.     '''A feed-style parser of email.'''
  120.     
  121.     def __init__(self, _factory = message.Message):
  122.         '''_factory is called with no arguments to create a new message obj'''
  123.         self._factory = _factory
  124.         self._input = BufferedSubFile()
  125.         self._msgstack = []
  126.         self._parse = self._parsegen().next
  127.         self._cur = None
  128.         self._last = None
  129.         self._headersonly = False
  130.  
  131.     
  132.     def _set_headersonly(self):
  133.         self._headersonly = True
  134.  
  135.     
  136.     def feed(self, data):
  137.         '''Push more data into the parser.'''
  138.         self._input.push(data)
  139.         self._call_parse()
  140.  
  141.     
  142.     def _call_parse(self):
  143.         
  144.         try:
  145.             self._parse()
  146.         except StopIteration:
  147.             pass
  148.  
  149.  
  150.     
  151.     def close(self):
  152.         '''Parse all remaining data and return the root message object.'''
  153.         self._input.close()
  154.         self._call_parse()
  155.         root = self._pop_message()
  156.         if not not (self._msgstack):
  157.             raise AssertionError
  158.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  159.             root.defects.append(errors.MultipartInvariantViolationDefect())
  160.         
  161.         return root
  162.  
  163.     
  164.     def _new_message(self):
  165.         msg = self._factory()
  166.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  167.             msg.set_default_type('message/rfc822')
  168.         
  169.         if self._msgstack:
  170.             self._msgstack[-1].attach(msg)
  171.         
  172.         self._msgstack.append(msg)
  173.         self._cur = msg
  174.         self._last = msg
  175.  
  176.     
  177.     def _pop_message(self):
  178.         retval = self._msgstack.pop()
  179.         if self._msgstack:
  180.             self._cur = self._msgstack[-1]
  181.         else:
  182.             self._cur = None
  183.         return retval
  184.  
  185.     
  186.     def _parsegen(self):
  187.         self._new_message()
  188.         headers = []
  189.         for line in self._input:
  190.             if line is NeedMoreData:
  191.                 yield NeedMoreData
  192.                 continue
  193.             
  194.             if not headerRE.match(line):
  195.                 if not NLCRE.match(line):
  196.                     self._input.unreadline(line)
  197.                 
  198.                 break
  199.             
  200.             headers.append(line)
  201.         
  202.         self._parse_headers(headers)
  203.         if self._headersonly:
  204.             lines = []
  205.             while True:
  206.                 line = self._input.readline()
  207.                 if line is NeedMoreData:
  208.                     yield NeedMoreData
  209.                     continue
  210.                 
  211.                 if line == '':
  212.                     break
  213.                 
  214.                 lines.append(line)
  215.             self._cur.set_payload(EMPTYSTRING.join(lines))
  216.             return None
  217.         if self._cur.get_content_type() == 'message/delivery-status':
  218.             while True:
  219.                 self._input.push_eof_matcher(NLCRE.match)
  220.                 for retval in self._parsegen():
  221.                     pass
  222.                 
  223.                 msg = self._pop_message()
  224.                 self._input.pop_eof_matcher()
  225.                 while True:
  226.                     line = self._input.readline()
  227.                     break
  228.                     continue
  229.                     None if line is NeedMoreData else None if retval is NeedMoreData else self._headersonly
  230.                 while True:
  231.                     line = self._input.readline()
  232.                     if line is NeedMoreData:
  233.                         yield NeedMoreData
  234.                         continue
  235.                     
  236.                     break
  237.                 if line == '':
  238.                     break
  239.                 
  240.                 self._input.unreadline(line)
  241.             return None
  242.         if self._cur.get_content_maintype() == 'message':
  243.             for retval in self._parsegen():
  244.                 if retval is NeedMoreData:
  245.                     yield NeedMoreData
  246.                     self._cur.get_content_type() == 'message/delivery-status'
  247.                     continue
  248.                 
  249.             
  250.             self._pop_message()
  251.             return None
  252.         if self._cur.get_content_maintype() == 'multipart':
  253.             boundary = self._cur.get_boundary()
  254.             if boundary is None:
  255.                 self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
  256.                 lines = []
  257.                 for line in self._input:
  258.                     lines.append(line)
  259.                 
  260.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  261.                 return None
  262.             separator = '--' + boundary
  263.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  264.             capturing_preamble = True
  265.             preamble = []
  266.             linesep = False
  267.             while True:
  268.                 line = self._input.readline()
  269.                 if line == '':
  270.                     break
  271.                 
  272.                 mo = boundaryre.match(line)
  273.                 if mo:
  274.                     if mo.group('end'):
  275.                         linesep = mo.group('linesep')
  276.                         break
  277.                     
  278.                     if capturing_preamble:
  279.                         if preamble:
  280.                             lastline = preamble[-1]
  281.                             eolmo = NLCRE_eol.search(lastline)
  282.                             if eolmo:
  283.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  284.                             
  285.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  286.                         
  287.                         capturing_preamble = False
  288.                         self._input.unreadline(line)
  289.                         continue
  290.                     
  291.                     while True:
  292.                         line = self._input.readline()
  293.                         if line is NeedMoreData:
  294.                             yield NeedMoreData
  295.                             continue
  296.                         
  297.                         mo = boundaryre.match(line)
  298.                         if not mo:
  299.                             self._input.unreadline(line)
  300.                             break
  301.                             continue
  302.                     self._input.push_eof_matcher(boundaryre.match)
  303.                     for retval in self._parsegen():
  304.                         if retval is NeedMoreData:
  305.                             yield NeedMoreData
  306.                             continue
  307.                         
  308.                     
  309.                     if self._last.get_content_maintype() == 'multipart':
  310.                         epilogue = self._last.epilogue
  311.                         if epilogue == '':
  312.                             self._last.epilogue = None
  313.                         elif epilogue is not None:
  314.                             mo = NLCRE_eol.search(epilogue)
  315.                             if mo:
  316.                                 end = len(mo.group(0))
  317.                                 self._last.epilogue = epilogue[:-end]
  318.                             
  319.                         
  320.                     else:
  321.                         payload = self._last.get_payload()
  322.                         if isinstance(payload, basestring):
  323.                             mo = NLCRE_eol.search(payload)
  324.                             if mo:
  325.                                 payload = payload[:-len(mo.group(0))]
  326.                                 self._last.set_payload(payload)
  327.                             
  328.                         
  329.                     self._input.pop_eof_matcher()
  330.                     self._pop_message()
  331.                     self._last = self._cur
  332.                     continue
  333.                 if not capturing_preamble:
  334.                     raise AssertionError
  335.                 preamble.append(line)
  336.                 continue
  337.                 capturing_preamble
  338.             if capturing_preamble:
  339.                 self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
  340.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  341.                 epilogue = []
  342.                 for line in self._input:
  343.                     if line is NeedMoreData:
  344.                         yield NeedMoreData
  345.                         continue
  346.                         continue
  347.                 
  348.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  349.                 return None
  350.             if linesep:
  351.                 epilogue = [
  352.                     '']
  353.             else:
  354.                 epilogue = []
  355.             for line in self._input:
  356.                 if line is NeedMoreData:
  357.                     yield NeedMoreData
  358.                     continue
  359.                 
  360.                 epilogue.append(line)
  361.             
  362.             if epilogue:
  363.                 firstline = epilogue[0]
  364.                 bolmo = NLCRE_bol.match(firstline)
  365.                 if bolmo:
  366.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  367.                 
  368.             
  369.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  370.             return None
  371.         lines = []
  372.         for line in self._input:
  373.             lines.append(line)
  374.         
  375.         self._cur.set_payload(EMPTYSTRING.join(lines))
  376.  
  377.     
  378.     def _parse_headers(self, lines):
  379.         lastheader = ''
  380.         lastvalue = []
  381.         for lineno, line in enumerate(lines):
  382.             if line[0] in ' \t':
  383.                 if not lastheader:
  384.                     defect = errors.FirstHeaderLineIsContinuationDefect(line)
  385.                     self._cur.defects.append(defect)
  386.                     continue
  387.                 
  388.                 lastvalue.append(line)
  389.                 continue
  390.             
  391.             if lastheader:
  392.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  393.                 self._cur[lastheader] = lhdr
  394.                 lastheader = ''
  395.                 lastvalue = []
  396.             
  397.             if line.startswith('From '):
  398.                 if lineno == 0:
  399.                     mo = NLCRE_eol.search(line)
  400.                     if mo:
  401.                         line = line[:-len(mo.group(0))]
  402.                     
  403.                     self._cur.set_unixfrom(line)
  404.                     continue
  405.                 elif lineno == len(lines) - 1:
  406.                     self._input.unreadline(line)
  407.                     return None
  408.                 defect = errors.MisplacedEnvelopeHeaderDefect(line)
  409.                 self._cur.defects.append(defect)
  410.                 continue
  411.             
  412.             i = line.find(':')
  413.             if i < 0:
  414.                 defect = errors.MalformedHeaderDefect(line)
  415.                 self._cur.defects.append(defect)
  416.                 continue
  417.             
  418.             lastheader = line[:i]
  419.             lastvalue = [
  420.                 line[i + 1:].lstrip()]
  421.         
  422.         if lastheader:
  423.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  424.         
  425.  
  426.  
  427.